Partial Identification Sandbox

Extending the Policy Evaluation Toolkit

Partial Identification
Causal Inference
Author

John Graves

Published

December 19, 2022

Code
library(tidyverse)
library(here)
library(glue)
library(copula)
library(knitr)
library(kableExtra)
library(ggsci)
library(MASS)
library(psych)
library(patchwork)
library(directlabels)
library(progress)
library(latex2exp)
library(copula)
select <- dplyr::select
options("scipen"=100, "digits"=6)
theme_set(hrbrthemes::theme_ipsum() + theme(legend.position = "top", panel.grid.minor.y = element_blank()) )

library(tufte)
# invalidate cache when the tufte version changes
knitr::opts_chunk$set(cache.extra = packageVersion('tufte'))
options(htmltools.dir.version = FALSE)

get_distribution_function <- function(x,fn = "pdf") {
  n.knots = 20
  dat <-
        tibble(x = x, cdf= ecdf(x)(x)); head(dat)

  n <- length(dat$x)
  fit <- scam::scam(cdf ~ s(x, bs = "mpi", k = n.knots), data = dat,
                    weights = c(n, rep(1, n - 2), 10 * n))
  ## interior knots
  xk <- with(fit$smooth[[1]], knots[4:(length(knots) - 3)])
  ## spline values at interior knots
  yk <- predict(fit, newdata = data.frame(x = xk))
  ## reparametrization into a monotone interpolation spline
  xg <- seq(min(dat$x), max(dat$x), length = 100)
  f <- stats::splinefun(xk, yk, "hyman")
  pcdf <- f(x)
  ppdf <- f(x,1)
  
  if (fn=="pdf") out <- approxfun(x,f(x,1)) 
  if (fn=="cdf") {
    out <- approxfun(x, f(x), 
            method = "constant",
            yleft = 0,
            yright = 1,
            f = 0)
  }
  return(out)
  
}

Outline

  1. Introduction
  2. Decision-Relevant Quantities of Interest
  • Individual-level treatment effects are of policy interest because they allow us to characterize the distribution of treatment effects, not just some summary (e.g., average) in a population.
    • What fraction of the population is harmed?
    • What fraction experiences substantial vs. minimal gain?
    • What is the average treatment response?
    • What is the median treatment response? The 25th percentile? The 75th percentile?
  1. The Perfect Data
  2. Credible Identification in the Real World
  • Even if marginal distributions of the outcome are identified (as they are under random assignment or under unconfounded selection), the “copula” that binds them together is unidentified.
  • This section will cover identification under the most ideal (realistic) real-world scenario: an experimental design with perfect compliance.
  • In that case we can identify the marginal distribution of the potential outcomes under treatment and non-treatment.
  1. Expanding the Toolkit via Partial Identification
  • Copulas and the relationship between marginal and joint distributions.
    • Brief theory
    • Simulating from a copula
  • Copulas and the relationship to treatment effects and decision-relevant quantities of interest.
  1. Worst-Case Bounds
  • Frechet-Hoeffding
  • Komogorov
  • Williamson and Downs
  • Firpo and Ridder?
  1. Tightening the Bounds with Covariates
  • Constructing conditional eCDFs
  1. Tightening the Bounds Using Additional Identifying Assumptions (Experimental Designs)
  • Frandsen and Lefgrens (2021)
  1. Partial Identification with an Exogenous Instrument
  • Manski IV bounds
  • Principal Stratification
  • Frandsen and Lefgrens (2021)
  1. Partial Identification in Observational Settings
  • Callaway (Panel Data)
  • Changes-in-Changes
  • Heckman: Selection
  • Manski: Monotone Treatment Response
  • Manski: Monotone Selection Response
Code
df <- # basic perfect doctor data (from Rubin)
  data.frame(patient = LETTERS[1:10],
            Y_1 = c(7,5,5,7,4,10,1,5,3,9),
             Y_0 = c(5,3,8,8,3,1,6,8,8,4))  %>% 
  mutate(Y_0 = c(1,6,1,8,2,1,10,6,7,8)) %>% 
  mutate(delta = Y_1 - Y_0) %>% 
  mutate(D = c(1,1,1,1,1,0,0,0,0,0)) %>% 
  mutate(Y_obs = case_when(D==1 ~ Y_1, TRUE ~ Y_0)) %>% 
  mutate(sim = 0) %>% 
  mutate(X = rnorm(10,mean = 0, sd = 1)) 

# Simulate additional data from a copula that defines the correlation b/t potential outcomes, and
# between a single covariate X and the potential outcomes. 
m <- 3 # number of columns
n <- 10000-10 # sample size
corXY <- 0.8 # corelation between potential outcomes and X
sigma <- matrix(c(1,0.6,corXY,0.6,1,corXY,corXY,corXY,1),nrow = 3) # correlation matrix

# Sample the joint CDF quantiles from a multivariate normal centered at 0
set.seed(100)
z <- mvrnorm(n,mu=rep(0, m),Sigma=sigma,empirical=T) 
u <- pnorm(z) # get the quantiles associated with each value

X <-  # a single covariate that is predictive of the outcome. 
  qnorm(u[,3],mean = 0, sd = 1)

# Generate the variables
delta <-   #Treatment effect varies, but has mean 2 and SD 1. 
  rnorm(n,mean = 2, sd = 1)

delta <-
  1 + ((X - mean(X)) / ( sd(X)))

sY_0 <- # simulated potential outcome under non-Tx
  qpois(u[,2],lambda=mean(df[df$D==1,]$Y_1)) + rnorm(n=n)

sY_1 <- # simulated potential outcome under Tx
  qpois(u[,1],lambda = mean(df[df$D==1,]$Y_1)) + delta

df_sim <- # Construct the final data. 
  tibble(patient = glue("sim{1:n}")) %>% 
  mutate(Y_1 = sY_1,
         Y_0 = sY_0,
         X = X) %>% 
  mutate(delta = Y_1 - Y_0) %>% 
  mutate(D = as.integer(runif(nrow(.))<.5)) %>%
  mutate(U = rnorm(nrow(.))) %>% 
  #mutate(D = as.integer(Y_1>=Y_0)) %>% 
  mutate(sim = 1)

df <- # Bind it with the general example rows (10 rows)
  df %>% 
  bind_rows(df_sim) %>% 
  mutate(Y = case_when(D==1 ~ Y_1, TRUE ~ Y_0)) 

# corUY <- 0.3
# 
# params <-
#     list(
#         n = 1e4,
#         sigma = matrix(c(1,0.8,0.9,corUY,
#                          0.8,1,0.9,corUY,
#                          0.9,0.9,1,.2,
#                          corUY,corUY,0.2,1), nrow = 4, ncol = 4, byrow = TRUE,
#                        dimnames = list(c("Y1","Y0","X1","U"),c("Y1","Y0","X1","U"))),
#         lambda = 15,
#         delta = 2
#     )
# 
# m <- ncol(params$sigma)
# 
# # Simulate from a copula
# set.seed(123)
# z <- # Draw from a multivariate normal distribution with covariance matrix sigma 
#     mvrnorm(params$n,mu=rep(0, m),Sigma=params$sigma,empirical=T) 
# u <- # obtain the quantiles associated with each value
#     pnorm(z) 
# 
# Y_0 <- 
#     qpois(u[,1], params$lambda) + rnorm(n = params$n, mean = 0, sd = 1) 
# 
# U <-
#     qnorm(u[,4], mean = 0, sd = 1)
# 
# tmp_ <- 1  +  (Y_0 - mean(Y_0)) / (1*sd(Y_0))
# 
# tmp_ <- 1  +  (U - mean(U)) / (1*sd(U))
# 
# Y_1 <- 
#     Y_0 + rpois(n = params$n, lambda = params$delta) * tmp_  + U
# 
# X1 <- 
#     qnorm(u[,3], mean = 0, sd = 1)
# 
# df <- 
#     tibble(study_id = paste0("ID",1:params$n), Y_0 = Y_0, Y_1 = Y_1, X1 = X1, U = U)
# 
df %>%
    head(n = 10) %>%
    select(patient, Y_0, Y_1) %>%
    kable(digits = 2) %>%
    kable_styling()
patient Y_0 Y_1
A 1 7
B 6 5
C 1 5
D 8 7
E 2 4
F 1 10
G 10 1
H 6 5
I 7 3
J 8 9
Code
df %>% 
    filter(row_number() %in% sample(1:nrow(.),replace = FALSE)) %>% 
    ggplot(aes(x = Y_0, y = Y_1)) + stat_ellipse() + 
    geom_point()

Figure 1: Scatterplot of Potential Outcomes with 95% Ellipse

Because we can calculate \(Y_i(1)-Y_i(0)\) directly, we can plot its distribution:

Code
df %>% 
  ggplot(aes(x = delta)) + geom_density() +
  labs(x = TeX("$\\Delta$"), y = "Density")+
  geom_vline(aes(xintercept =  mean(delta)),lty=2,lwd=1.1, col = "darkred") + 
  annotate("text", x = mean(delta), y = .1, hjust=-.1, label = "Avg. Treatment Effect", col = "darkred") + 
  scale_x_continuous(limits = c(min(df$delta),max(df$delta)),breaks = seq(-8,10,2)) + 
  geom_vline(aes(xintercept = 0),lty=3) 

Figure 2: Distribution of the Treatment Effect (delta)

We can also explore whether the treatment effect magnitude varies by some covariate \(X1\). In Figure 3, we see that the treatment effect tends to be larger for larger values of \(X1\).

Code
# Source: https://stackoverflow.com/questions/61589781/plot-vertical-density-of-normal-distribution-in-r-and-ggplot2

taus <- seq(0,1,0.01) 
quantreg_fit <- 
  quantreg::rq(delta ~ X, tau = taus, data = df)
pvals <- predict(quantreg_fit, stepfun=TRUE)
X <- df$X

get_delta_dist <- function(qq, scaling = 10) {
  qq %>% map_df(~({
    delta_ <- seq(min(df$delta), max(df$delta), length.out = 1e4)
    hatF <- pvals[[which(abs(ecdf(X)(X) - .x) < 0.01)[1]]]
    pdfX <- get_distribution_function(hatF(taus),  fn = "pdf")
    pdf_ <-
      tibble(y = delta_, x = 
                pdfX(delta_)) %>%
      na.omit()
    
    pdf_norm <- # PDF must integrate to one but sometimes its a bit off. 
      # So we can re-normalize to integrate to one. 
      sum(c(0,diff(delta_))*pdfX(delta_),na.rm=TRUE)
    
    mm <- 
      sum(delta_ * c(0,diff(delta_))* (pdfX(delta_)/pdf_norm),na.rm=TRUE)
  

    pdf_ %>% mutate(X = X[which(abs(ecdf(X)(X) - .x) < 0.01)[1]]) %>% 
      mutate(tau = .x) %>% 
      mutate(mean_delta = mm)
  })) %>% 
  mutate(max = max(x)) %>% 
  mutate(x = (x))
  
}

df_condpdf <- 
  get_delta_dist(c(.01,0.05,0.25,0.5,0.75,0.95)) %>% 
  mutate(x = X + x * 10)  %>% 
  mutate(X_ = X) %>% 
  mutate(X = factor(X, labels = unique(.$tau))) %>% 
  filter(abs(x-X_)>0.03)

df %>% 
  ggplot(aes(x= X, y = delta)) + geom_point(alpha = 0.25) + 
  geom_path(data = df_condpdf, aes(x=x,y=y, colour = X),lwd=2)  + 
  scale_x_continuous(breaks = round(unique(df_condpdf$X_),2)) +
  scale_y_continuous(breaks = c(seq(-20,20,10),round(unique(df_condpdf$mean_delta),2)),guide = guide_axis(n.dodge=2)) + 
  scale_color_aaas(name="Quantile of X") + 
  theme(legend.position = "top", panel.grid.minor.y = element_blank()) +
  labs(y = TeX("$\\Delta$"), y = "Covariate (X) Value") +
  geom_hline(aes(yintercept = 0),lwd=1.25)

Figure 3: Treatment Response by Covariate Value

We can also plot the cumulative distribution function of the treatment response. This is shown in Figure 4.

Code
delta_ <- sort(df %>%  pull(delta)) 
df_ <- data.frame(x = delta_, y = seq_along(delta_)/length(delta_), region = delta_ < 0 )

br0_ <- unique(sort(c(seq(0,1,0.2),ecdf(delta_)(0))))
br0_l <- paste0(round(br0_,2))

figdelta <- 
  ggplot(df_, aes(x, y)) +
  geom_area(aes(fill = region)) +
  geom_line() +
  labs(x = "x = Treatment Effect", y = TeX("$Pr(\\Delta$<=x)"))+
  scale_fill_manual(values = c('lightgrey', '#C14E4295'), guide = "none") +
  annotate('text', x = 0, y = 0.08, label = 'Harmed', col = "black",size = 4, hjust=1.1) +
   geom_vline(aes(xintercept = 2),lty=2,lwd=1.1, col = "darkred") + 
  annotate("text", x = 1.5, y = 0.8, hjust=1, label = "Avg. Treatment Effect", col = "darkred", size = 4) +
  scale_y_continuous(breaks = br0_, labels = br0_l) +
  geom_point(data = tibble(x = 0, y = ecdf(delta_)(0)), aes(x = x, y = y),size=5, pch=10) 

figdelta

Figure 4: Cumulative Distribution Function of the Treatment Effect (delta)

Decision-Relevant Quantities of Interest

Table 1 summarizes various quantities of interest we can calculate with full information on the joint distribution of potential outcomes.

Code
tx_effect <- 
    df %>%  pull(delta)

delta_ <- 
    seq(floor(min(tx_effect)), ceiling(max(tx_effect)),length.out = 1e6)

avg_delta <- 
    sum(delta_ * c(0,diff(ecdf(tx_effect)(delta_))))

frac_harmed <- 
    ecdf(tx_effect)(-1)

frac_minimal_harm <- 
    ecdf(tx_effect)(0)-ecdf(tx_effect)(-1)

frac_low <- 
    ecdf(tx_effect)(1)-ecdf(tx_effect)(0)

frac_high <- 
    1 - ecdf(tx_effect)(1)

q_delta  <- 
    quantile(ecdf(tx_effect),c(0.1,0.5,0.75))

quantities <- 
    tibble(average = avg_delta,
           q10 = q_delta[1],
           q50 = q_delta[2],
           q75 = q_delta[3],
           harm = 100* frac_harmed,
           minimal_harm = 100* frac_minimal_harm,
           low = 100*frac_low,
           high= 100*frac_high) %>% 
    gather(quantity,value) %>% 
    mutate(quantity = factor(quantity, levels = c("average","q10","q50","q75","harm","minimal_harm","low","high"),
                             labels = c("Average Treatment Response",
                                        "25th %ile of Tx Response",
                                        "50th %ile of Tx Response",
                                        "75th %ile of Tx Reponse",
                                        "Percent with Substantial Harm",
                                        "Percent with Minimal Harm",
                                        "Percent with Low Benefit",
                                        "Percent with High Benefit"))) %>% 
    mutate(formula = c("$\\int t d \\hat  F_{\\Delta}(t)$","$\\hat  F^{-1}_{\\Delta}(0.25)$","$\\hat F^{-1}_{\\Delta}(0.50)$","$\\hat F^{-1}_{\\Delta}(0.75)$","$\\hat F_{\\Delta}(-1)$",
                       "$\\hat F_{\\Delta}(0)-\\hat F_{\\Delta}(-1)$","$\\hat F_{\\Delta}(1)-\\hat F_{\\Delta}(0)$","$1 - \\hat F_{\\Delta}(1)$"))

quantities %>% 
    select(quantity,formula,value) %>% 
    kable(booktabs = T ,escape = F, align = 'l', format = "markdown",
          col.names = c("Quantity of Interest","Estimator","Value"), digits =2) %>%
    kable_classic(full_width = F,
                  position = "center",font_size = 35) 
Table 1: Quantities of Interest
Quantity of Interest Estimator Value
Average Treatment Response \(\int t d \hat F_{\Delta}(t)\) 0.99
25th %ile of Tx Response \(\hat F^{-1}_{\Delta}(0.25)\) -2.27
50th %ile of Tx Response \(\hat F^{-1}_{\Delta}(0.50)\) 0.93
75th %ile of Tx Reponse \(\hat F^{-1}_{\Delta}(0.75)\) 2.68
Percent with Substantial Harm \(\hat F_{\Delta}(-1)\) 22.46
Percent with Minimal Harm \(\hat F_{\Delta}(0)-\hat F_{\Delta}(-1)\) 13.12
Percent with Low Benefit \(\hat F_{\Delta}(1)-\hat F_{\Delta}(0)\) 15.61
Percent with High Benefit \(1 - \hat F_{\Delta}(1)\) 48.81

What Can We Point Identify in an Experimental Study?

Code
df_exp <- 
    df %>% 
    select(patient, Y, X, D)

df_exp %>%
    head(n = 10) %>%
    kable(digits = 2) %>%
    kable_styling()
patient Y X D
A 7 2.36 1
B 5 0.07 1
C 5 -0.91 1
D 7 1.45 1
E 4 0.84 1
F 1 1.24 0
G 10 0.50 0
H 6 -0.07 0
I 7 0.30 0
J 8 0.05 0

We can plot the empirical CDF of the treated and control groups. Notice in Figure 5 that the curves cross. This provides suggestive evidence that there is heterogeneity in treatment effects.

Code
Y_Tx <- df_exp %>% filter(D==1) %>% pull(Y)
Y_Cx <- df_exp %>% filter(D==0) %>% pull(Y)
D <- df_exp %>% pull(D)
Y <- df_exp %>% pull(Y)

ate <- 
    mean(Y[D==1]) - mean(Y[D==0])

qte_q <- 
  c(0.1,0.25,0.5,0.75,0.9) %>% map_dbl(~(quantile(ecdf(Y_Tx),.x) - quantile(ecdf(Y_Cx),.x)))

p <- 
    df_exp %>% 
    mutate(D = factor(D, labels = c("Cx","Tx")))  %>% 
    ggplot(aes(x = Y, colour = D)) + stat_ecdf() + 
    scale_colour_aaas(name="")  +
    theme(legend.position = "top")
p

Figure 5: eCDFs of Treated and Control Groups

If \(F_{Y_1|D=1}\) and \(F_{Y_0|D=1}\) are identified, we can use the marginal distribution of the outcome among the treated and control units to calculate the Average Treatment Effect on the Treated (ATT) and the Quantile Treatment Effect on the Treated (QTT):

The ATT is given by:

\[ ATT=E\left[Y_{1}-Y_{0} \mid D=1\right] \]

And the QTT is given by:

\[ Q T T(\tau)=F_{Y_{1} \mid D=1}^{-1}(\tau)-F_{Y_{0} \mid D=1}^{-1}(\tau) \]

Code
fig_qtt <- 
  tibble(x = 1:49/50, y =  (1:49/50) %>% map_dbl(~(quantile(ecdf(Y_Tx),.x) - quantile(ecdf(Y_Cx),.x)))) %>% 
  ggplot(aes(x = x, y = y)) + geom_point(size=3,pch=19) +
  labs(x = TeX("$\\tau$"),y=TeX("$\\hat{F}^{-1}_{Y_1|D=1}(\\tau) - \\hat{F}^{-1}_{Y_0|D=1}(\\tau)$")) + 
  geom_hline(aes(yintercept = 0), lwd=1.25) +
   scale_y_continuous(limits = c(-2,8),breaks = seq(-2,8,2))


fig_qtt 

Figure 6: Quantile Treatment Effect on the Treated

We can estimate the ATT and QTT using linear regression (e.g., OLS) and quantile regression, respectively. Figure 7 overlays the estimate ATT (red) and QTT estimates (dark blue) based on OLS and quantile regression fit to the experimental data generated above.

Code
# Source: https://stackoverflow.com/questions/59348163/plotting-quantile-regression-sensitivity-in-ggplot
# OLS
lm <- lm(data= df_exp, 
         formula =  Y ~  D)

ols <- as.data.frame(coef(lm))
ols.ci <- as.data.frame(confint(lm))
ols2 <- cbind(ols, ols.ci)
ols2 <- tibble::rownames_to_column(ols2, var="term") %>% 
  filter(term=="D")

# Quantile Regression
quantreg_fit <- 
  quantreg::rq(Y ~ D, tau = 1:45/50, data = df_exp)

p_exp <- 
  quantreg_fit %>% 
  broom::tidy() %>%
  filter(term=="D") %>% 
  ggplot(aes(x=tau,y=estimate))+
  geom_point(data =  tibble(tau = 1:49/50, estimate =  (1:49/50) %>% map_dbl(~(quantile(ecdf(Y_Tx),.x) - quantile(ecdf(Y_Cx),.x)))),size=3,pch=19) + 
  # quantilie results
  geom_point(color="#27408b", size = 3)+ 
  geom_line(color="#27408b", size = 1)+ 
  geom_ribbon(aes(ymin=conf.low,ymax=conf.high),alpha=0.25, fill="#27408b")+

  # OLS results
  geom_hline(data = ols2, aes(yintercept= `coef(lm)`), lty=1, color="red", size=1)+
  geom_hline(data = ols2, aes(yintercept= `2.5 %`), lty=2, color="red", size=1)+
  geom_hline(data = ols2, aes(yintercept= `97.5 %`), lty=2, color="red", size=1) + 
  geom_hline(aes(yintercept = 0), lwd=1.25) +
  scale_y_continuous(limits = c(-2,8),breaks = seq(-2,8,2))

p_exp 

Figure 7: Average and Quantile Treatment Effect on the Treated

Can We Partially Identify Other Quantities of Interest?

Simulating from a Copula

######################################
## Option 1: Cholesky Decomposition
######################################
# Source: Coping with Copulas Sec 5.1

# 1. Define the correlation matrix
  rho <- 0.6
  sigma <- matrix(c(1,rho,rho,1),nrow = 2, byrow=TRUE)

# 2. Perform a cholesky decomposition
  A = chol(sigma)

# 3. Generate iid standard normal pseudo random variables
  tildeY0 <- rnorm(n = 1e6, mean = 0, sd = 1)
  tildeY1 <- rnorm(n = 1e6, mean = 0, sd = 1)
  tildeY <- cbind(tildeY0,tildeY1)

  # Collect them 
  tildeY <- tildeY %*% A

# Use the standard normal disribution function to return the quantiles
  U <- pnorm(tildeY)

# Use the inverse distribution function to return the values
  Y <- U 
  Y[,1] <- qpois(U[,1],lambda = 10)
  Y[,2] <- qpois(U[,2],lambda = 12)

# Confirm the correct correlation 
cor(Y[,1],Y[,2])
##########################
## Option 2: Use MVRNORM
##########################
U_ <- pnorm(mvrnorm(n = 1e6, Sigma = sigma, mu = c(0,0)))
Y_ <- U_
Y_[,1] <- qpois(U_[,1],lambda = 10)
Y_[,2] <- qpois(U_[,2],lambda = 12)

cor(Y_[,1],Y_[,2])

Sklar (1959) demonstrates that the joint distribution can be represented as

\[ \mathrm{F}_{Y_{1}, Y_{0} \mid D=1}\left(y_1, y_0\right)=C_{Y_{1}, Y_{0} \mid D=1}\left(\mathrm{~F}_{Y_{1} \mid D=1}\left(y_1\right), \mathrm{F}_{Y_{0} \mid D=1}\left(y_0\right)\right) \] The copulas are given by \[ M(y_1,y_0)=\min \left\{y_1,y_0\right\} \] for extreme positive dependence and

\[ W\left(y_1, y_0\right)=\max \left\{y_1+y_0-1,0\right\} \] for perfect negative dependence.

We can use this to form bounds on the joint distribution (\(H(y_1, y_2)\)) based on the marginals:

\[ \max [F(y_1)+G(y_0)-1,0] \leq H(y_1, y_0) \leq \min [F(y_1), G(y_0)] \]

Williamson and Downs (199) use this relationship to solve for bounds on the difference between two variables:

Worst-Case Bounds

Source: Rank, Jörn, ed. Copulas: From theory to application in finance. Vol. 108. Bloomberg Press, 2007.

Nice slides on copulas.

  • Frechet-Hoeffding lower bounds corresponds to case with perfect anticorrelation between potential outcomes.
  • Upper bounds corresponds to case with perfect correlation between potential outcomes.
  • They are distributions of perfectly negatively dependent and perfectly positively dependent random variables respectively, see Joe (1997) and Nelsen (1999) for more discussions.

Here is the paper on copulas in econometrics.

Sharp bounds on the joint distribution of the potential outcomes with identiÖed marginals are given by the Frechet-Hoe§ding lower and upper bound distributions, see Heckman and Smith (1993), Heckman, Smith, and Clements (1997), and Manski (1997b) for their applications in program evaluation. For randomized experiments, Heckman, Smith, and Clements (1997) proposed nonparametric estimates of the FrÈchet-Hoe§ding distribution bounds and developed a test for the ìcommon e§ectî model by testing the lower bound of the variance of the treatment e§ect. They also suggested an alternative test based on the di§erence between the quantile functions of the marginal distributions of the potential outcomes referred to as the quantile treatment e§ect (QTE), see Firpo (2005) or Section 2 for more references. Sharp bounds on the distribution of the treatment e§ectó the di§erence between two potential outcomes with identiÖed marginalsó are known in the probability literature. A.N. Kolmogorov posed the question of Önding sharp bounds on the distribution of a sum of two random variables with Öxed marginal distributions. It was Örst solved by Makarov (1981) and later by R¸schendorf (1982) and Frank, Nelsen, and Schweizer (1987) using di§erent techniques. Frank, Nelsen, and Schweizer (1987) showed that their proof based on copulas can be extended to more general functions than the sum. Sharp bounds on the respective distributions of a di§erence, a product, and a quotient of two random variables with Öxed marginals can be found in Williamson and Downs (1990). More recently, Denuit, Genest, and Marceau (1999) extended the bounds for the sum to arbitrary dimensions and provided some applications in Önance and risk management, see Embrechts, Hoeing, and Juri (2003) and McNeil, Frey, and Embrechts (2005) for more discussions and additional references.

Source

(a) Lower Bound

(b) Upper Bound

Figure 8: Copluas for Frechet-Hoeffding Bounds

Figure 9: Bounds on the Cumulative Distribution Function of the Treatment Effect (delta)

Code
#col_scheme <- c("#FF5733" ) # Williamson and Downs

df_wd  %>% 
  mutate(method = factor(method, levels = c("Frandsen & Lefgrens (2021)"     ,       "Frandsen & Lefgrens (2021) Covariates", "Worst-Case\n[Williamson-Downs (1990)]"))) %>% 
  mutate(type = paste0(type,method)) %>% 
  ggplot() + 
  geom_line(aes(x = tau, y = bound, group = type, colour = method),lwd=1.5,colour = "#FF5733") + 
  geom_dl(method = list("last.points",hjust=1),aes(label = label,x = tau, y = bound, group = type, colour = method)) +
  scale_x_continuous(breaks = seq(0,1,0.1)) +
  theme(legend.position = "none")  +
  scale_colour_manual(values = col_scheme[3]) +
  labs(x = TeX("$$\\tau$$"),y=TeX("QoTT($$\\tau$$)")) +
  geom_point(data =  tibble(type = "test", bound = "truth", method = "foo",tau = 1:49/50, estimate =  (1:49/50) %>% map_dbl(~(quantile(ecdf(Y_Tx),.x) - quantile(ecdf(Y_Cx),.x)))),size=3,pch=19,
             aes(x = tau, y = estimate))
  # geom_point(data =  tibble(tau = taus, qtt = QTT, type = "True Values", method = "True Values"), aes(x = tau, y = qtt ),
  #            col="darkred") 

Figure 10: Worst-Case Treatment Effect Distribution Bounds

Frandsen and Lefgrens

We’ll next construct bounds based on Frandsen and Lefgren (2021).

Bound Distributions for a Single Y Value

Our first objective is to simply plot the PDF of the untreated group. We’ll obtain this by differentiating the empirical CDF and fitting a smooth function to it.

gen_pdf <- function(F,x, n.knots = 20) {
    dat <-
        tibble(x, cdf=F(x))
    
    n <- length(dat$x)
    fit <- scam::scam(cdf ~ s(x, bs = "mpi", k = n.knots), data = dat,
                      weights = c(n, rep(1, n - 2), 10 * n))
    ## interior knots
    xk <- with(fit$smooth[[1]], knots[4:(length(knots) - 3)])
    ## spline values at interior knots
    yk <- predict(fit, newdata = data.frame(x = xk))
    ## reparametrization into a monotone interpolation spline
    xg <- seq(min(dat$x), max(dat$x), length = 100)
    f <- stats::splinefun(xk, yk, "hyman")
    dat$pdens = f(x,1)
    dat <- dat %>% 
        mutate(dt = c(0,diff(x,1)),
               dy = c(0,diff(cdf,1))) %>% 
        mutate(dydt = dy/dt)
    
    return(dat)
}

Y0 <- 4
y0_ <- df %>% filter(D==0) %>% pull(Y) 
y1_ <- df %>% filter(D==1) %>% pull(Y) 

df_pdf0 <-
    gen_pdf(F = F0,x = sort(c(Y0,y0_))) %>%
    na.omit()

df_pdf1 <-
    gen_pdf(F = F1,x = sort(c(Y0,y1_))) %>%
    na.omit()

We next need to find the y value in the treated distribution that maps to the quantile value in the untreated distribution at -1.

Let’s now define conditional PDFs for the best and worst case

df_condpdf_worst <- 
    df_pdf1 %>% 
    mutate(pdens = ifelse(x<=yy,pdens,0)) %>% 
    # Normalize so the PDF integrates to one.
    mutate(pdens = (pdens) / sum(pdens * dt))

df_condpdf_best <- 
    df_pdf1 %>% 
    mutate(pdens = ifelse(x>yy,pdens,0)) %>% 
    # Normalize so the PDF integrates to one.
    mutate(pdens = (pdens) / sum(pdens * dt))

f2A <- 
    df_condpdf_worst %>% 
    ggplot(aes(x = x, y = pdens)) + geom_point() #+
        #scale_y_continuous(limits = c(0,0.5))

f2B <- 
    df_condpdf_best %>% 
    ggplot(aes(x = x, y = pdens)) + geom_point() #+
        #scale_y_continuous(limits = c(0,0.5))

Empirical Density for Worst and Best Case

The rightmost panel of the figure shows that an individual with the given Y value has zero probability of drawing a \(Y(1)\) below his or her rank in the control state. Instead, her draws is from the truncated distribution of \(Y(1)\) ranks above her rank in the control state. As noted in the paper, “the best case corresponds to the lower-bound CDF given in equation (1).” scratch ends here

Figure 11: Frandsen-Lefgren (2021) Bounds on the Cumulative Distribution Function of the Treatment Effect (delta)

Figure 12: Worst-Case Bounds on the Average Treatment Effect on the Treated, by Method

Code
#col_scheme <- c("#FF5733" ) # Williamson and Downs

df_wd  %>% 
  bind_rows(df_fl) %>% 
  mutate(method = factor(method, levels = c("Frandsen & Lefgrens (2021)"     ,       "Frandsen & Lefgrens (2021) Covariates", "Worst-Case\n[Williamson-Downs (1990)]"))) %>% 
  mutate(type = paste0(type,method)) %>% 
  ggplot(aes(x = tau, y = bound, group = type)) + 
  geom_line(lwd=1.25,aes(colour = method)) + 
  scale_color_manual(values=col_scheme[c(1,3)]) + 
  geom_dl(method = list("last.points",hjust=1),aes(label = label)) +
  scale_x_continuous(breaks = seq(0,1,0.1)) +
  theme(legend.position = "none")  +
   labs(x = TeX("$$\\tau$$"),y=TeX("QoTT($$\\tau$$)"))

Figure 13: Fransden-Lefgren Treatment Effect Distribution Bounds

Let’s get it even tighter with covariates. This is an approach based on Frandsen and Lefgren (2021).

Figure 14: Frandsen-Lefgren (2021) Bounds on the Cumulative Distribution Function of the Treatment Effect, With Covariates

Figure 15: Worst-Case Bounds on the Average Treatment Effect on the Treated, by Method

Code
df_wd  %>% 
  bind_rows(df_fl) %>% 
  bind_rows(df_flX) %>% 
  mutate(method = factor(method, levels = c("Frandsen & Lefgrens (2021)"     ,       "Frandsen & Lefgrens (2021)\nWith Covariates", "Worst-Case\n[Williamson-Downs (1990)]"))) %>% 
  mutate(type = paste0(type,method)) %>% 
  ggplot(aes(x = tau, y = bound, group = type)) + 
  geom_line(lwd=1.25,aes(colour = method)) + 
  scale_color_manual(values=col_scheme) + 
  geom_dl(method = list("last.points",hjust=1),aes(label = label,colour = method)) +
  scale_x_continuous(breaks = seq(0,1,0.1)) +
  theme(legend.position = "none")  +
  labs(x = TeX("$$\\tau$$"),y=TeX("QoTT($$\\tau$$)"))
  #annotate("text",x = 0.5, y = 2, label = "True Treatment Effect",vjust=-1,colour = "darkred") 

Figure 16: Fransden-Lefgren Treatment Effect Distribution Bounds With Covariates

References

Frandsen, Brigham R., and Lars J. Lefgren. 2021. “Partial Identification of the Distribution of Treatment Effects with an Application to the Knowledge Is Power Program (KIPP).” Quantitative Economics 12 (1): 143–71.